library(forecast)
library(dplyr)
library(tidyr)
library(stringr)
library(lubridate)
library(ggplot2)
theme_set(theme_bw())
library(weatherGen)
data(climate)
# set RNG seed for reproducibility
set.seed(2345)
Get list of files with each file containing a monthly climate dataset for one grid point.
DATA_DIR <- '/Users/jeff/Projects/UMass/virtue/data/mon/east'
files <- list.files(DATA_DIR) %>%
str_split('_') %>%
do.call(rbind, .) %>%
as.data.frame(stringsAsFactors=FALSE) %>%
mutate(FILE=paste(V1, V2, V3, sep="_")) %>%
select(FILE, LAT=V2, LON=V3) %>%
mutate(LAT=as.numeric(LAT), LON=as.numeric(LON))
Here is a map of the climate data grid points.
library(ggmap)
map <- get_map(location=c(lon=mean(range(files$LON)), lat=mean(range(files$LAT))),
zoom=4, maptype="satellite", color='bw')
## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=36.625,-78.8125&zoom=4&size=%20640x640&scale=%202&maptype=satellite&sensor=false
## Google Maps API Terms of Service : http://developers.google.com/maps/terms
ggmap(map, darken=c(0.25, "white"), extent="device") +
geom_point(aes(x=LON, y=LAT), data=files, color='red', size=1)
Zoomed into new england. The green points will be used to compute a regional average.
map <- get_map(location=c(lon=-72, lat=42),
zoom=7, maptype="satellite", color='bw')
## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=42,-72&zoom=7&size=%20640x640&scale=%202&maptype=satellite&sensor=false
## Google Maps API Terms of Service : http://developers.google.com/maps/terms
ggmap(map, darken=c(0.25, "white"), extent="device") +
geom_point(aes(x=LON, y=LAT, color=SELECT),
data=mutate(files, SELECT=(LAT>=41 & LAT<=43 & LON>=-74 & LON<=-70)), size=2) +
geom_vline(aes(xintercept=LON), color='green', linetype=2,
data=data.frame(LON=seq(-74, -70))) +
geom_hline(aes(yintercept=LAT), color='green', linetype=2,
data=data.frame(LAT=seq(41, 43))) +
scale_color_manual(values=c("TRUE"="green", "FALSE"="red"), guide=FALSE)
Extract the points within the selected region.
clim.raw <- filter(files, LAT>=41, LAT<=43, LON>=-74, LON<=-70) %>%
apply(1, function(f) {
x <- read.table(file.path(DATA_DIR, f[['FILE']]))
names(x) <- c("YEAR", "MONTH", "PRCP","TMAX","TMIN","TAVG")
x$LAT <- f['LAT']
x$LON <- f['LON']
return(x)
}) %>%
do.call(rbind, .)
clim.mon <- gather(clim.raw, VAR, VALUE, PRCP:TAVG) %>%
mutate(VALUE=as.numeric(VALUE))
clim.yr <- group_by(clim.raw, YEAR, LAT, LON) %>%
summarise(PRCP=sum(PRCP),
TMAX=mean(TMAX),
TMIN=mean(TMIN),
TAVG=mean(TAVG)) %>%
gather(VAR, VALUE, PRCP:TAVG)
Compute the regional average monthly value of each climate variable.
clim.reg.mon <- clim.mon %>%
group_by(YEAR, MONTH, VAR) %>%
summarise(MEAN=mean(VALUE),
SD=sd(VALUE))
mutate(clim.reg.mon, DATE=ymd(paste(YEAR,MONTH,1,sep='-'))) %>%
ggplot(aes(DATE, MEAN)) +
geom_ribbon(aes(ymin=MEAN-SD, ymax=MEAN+SD), fill='grey80') +
geom_line(color='red') +
facet_wrap(~VAR, ncol=1, scales='free_y') +
labs(x="Month/Year", y="Mean +/- StDev")
Compute the regional average annual value of each climate variable.
clim.reg.yr <- clim.yr %>%
group_by(VAR, YEAR) %>%
summarise(MEAN=mean(VALUE),
SD=sd(VALUE))
ggplot(clim.reg.yr, aes(YEAR, MEAN)) +
geom_ribbon(aes(ymin=MEAN-SD, ymax=MEAN+SD), fill='grey80') +
geom_line(color='red') +
facet_wrap(~VAR, ncol=1, scales='free_y') +
labs(x="Year", y="Mean +/- StDev")
Convert the regional average datasets wide format.
clim.reg.mon <- select(clim.reg.mon, -SD) %>%
spread(VAR, MEAN)
head(clim.reg.mon)
## Source: local data frame [6 x 6]
##
## YEAR MONTH PRCP TMAX TMIN TAVG
## 1 1949 1 117.52 4.243 -4.818 7.308
## 2 1949 2 77.78 4.882 -6.123 6.925
## 3 1949 3 48.22 8.025 -3.279 8.426
## 4 1949 4 98.86 15.229 2.510 6.285
## 5 1949 5 108.86 21.098 7.175 5.046
## 6 1949 6 23.19 27.450 13.578 5.312
clim.reg.yr <- select(clim.reg.yr, -SD) %>%
spread(VAR, MEAN)
head(clim.reg.yr)
## Source: local data frame [6 x 5]
##
## YEAR PRCP TMAX TMIN TAVG
## 1 1949 912.9 16.15 4.105 6.266
## 2 1950 1075.9 14.42 2.871 6.151
## 3 1951 1281.2 14.87 3.502 6.087
## 4 1952 1168.9 15.11 3.731 6.409
## 5 1953 1329.0 16.01 4.151 5.943
## 6 1954 1291.9 14.62 3.333 6.300
Perform wavelet analysis on regional average annual precipitation timeseries.
wave <- wavelet_analysis(clim.reg.yr$PRCP, sig.level=0.90, noise.type='white')
par(mfrow=c(1,2))
plot(wave, plot.cb=TRUE, plot.phase=FALSE)
plot(wave$gws, wave$period, type="b",
xlab="Global Wavelet Spectrum", ylab="Fourier Period (Years)",
log="y", ylim=rev(range(wave$period)),
xlim=range(c(wave$gws, wave$gws.sig$signif)))
lines(wave$gws.sig$signif, wave$period, lty=2, col='red')
Although there are some significant wavelet periods, we’ll use a simple arima model of the regional annual precipitation instead of arima models of the wavelet components.
models <- list(PRCP=auto.arima(clim.reg.yr$PRCP,
max.p=2,max.q=2,max.P=0,max.Q=0,
stationary=TRUE))
models[["PRCP"]]
## Series: clim.reg.yr$PRCP
## ARIMA(1,0,0) with non-zero mean
##
## Coefficients:
## ar1 intercept
## 0.218 1183.87
## s.e. 0.126 26.69
##
## sigma^2 estimated as 27243: log likelihood=-404.6
## AIC=815.2 AICc=815.6 BIC=821.6
This figure shows a 20-year forecast of the arima model reflecting no low-frequency oscillations (since we are not using the wavelet decomposition).
par(mfrow=c(1,1))
plot(forecast(models[['PRCP']], h=20), main="ARIME Forecast of Regional Annual Precip",
xlab="Timestep (yr)", ylab='Annual Precip (mm/yr)')
Generate 50 simulations of annual precipitation using the AR1 model of the regional annual precipitation.
sim.prcp <- weatherGen::mc_arimas(models=models, n=nrow(clim.reg.yr), n.iter=50)
str(sim.prcp)
## List of 4
## $ x : num [1:62, 1:50] 1063 1355 1381 1024 1460 ...
## $ gws : num [1:21, 1:50] 9569 14863 21023 27508 23138 ...
## $ x.stat : num [1:62, 1:3] 1168 1178 1188 1182 1185 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : NULL
## .. ..$ : chr [1:3] "MEAN" "Q025" "Q975"
## $ gws.stat: num [1:21, 1:3] 12705 20034 21128 22864 24353 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : NULL
## .. ..$ : chr [1:3] "MEAN" "Q025" "Q975"
Compare the GWS power and the mean, standard deviation, and skewness statistics across these simulations (red point is the overall mean, boxplots are the distribution across simulations) to regional annual precipitation timeseries (blue point).
par(mfrow=c(2,2))
x1 <- 1
x2 <- min(length(wave$period), nrow(sim.prcp$gws))
ymin <- min(wave$period[x1:x2], wave$gws[x1:x2],
sim.prcp$gws.stat[x1:x2, 'MEAN'],
sim.prcp$gws.stat[x1:x2, 'Q025'])
ymax <- max(wave$period[x1:x2], wave$gws[x1:x2],
sim.prcp$gws.stat[x1:x2, 'MEAN'],
sim.prcp$gws.stat[x1:x2, 'Q975'])
plot(wave$period[x1:x2], wave$gws[x1:x2],
type="n", ylim=c(ymin,ymax), xlab="Period (years)", ylab="",
main="", log="y")
mtext(side=2, expression(paste("Power (",mm^2,")")), line=2.5)
polygon(c(wave$period[x1:x2],
rev(wave$period[x1:x2])),
c(sim.prcp$gws.stat[x1:x2, 'Q025'],
rev(sim.prcp$gws.stat[x1:x2, 'Q975'])),
col="grey")
lines(wave$period[x1:x2], wave$gws[x1:x2])
lines(wave$period[x1:x2], sim.prcp$gws.stat[x1:x2, 'MEAN'], lty=2, col="blue")
lines(wave$period[x1:x2], wave$gws.sig$signif[x1:x2], col="red", lty=3)
boxplot(colMeans(sim.prcp$x), main="Mean")
points(mean(clim.reg.yr$PRCP),col="red",pch=19)
points(mean(colMeans(sim.prcp$x)), col="blue", pch=19)
boxplot(apply(sim.prcp$x, 2, sd), main="Standard Deviation")
points(sd(clim.reg.yr$PRCP), col="red", pch=19)
points(mean(apply(sim.prcp$x, 2, sd)), col="blue", pch=19)
boxplot(apply(sim.prcp$x, 2, skewness), main="Skew")
points(skewness(clim.reg.yr$PRCP), col="red", pch=19)
points(mean(apply(sim.prcp$x, 2, skewness)),col="blue",pch=19)
This figure shows the simulated and observed regional-average annual precipitation. The simulations were generated using the AR1 model described above.
as.data.frame(sim.prcp$x) %>%
mutate(TIMESTEP=row_number()) %>%
gather(TRIAL, VALUE, -TIMESTEP) %>%
ggplot(aes(TIMESTEP, VALUE)) +
geom_line(aes(group=TRIAL), alpha=0.3) +
stat_summary(fun.y=mean, geom='line', color='red', size=1) +
geom_line(aes(x=TIMESTEP, y=PRCP), data=mutate(clim.reg.yr, TIMESTEP=row_number()), color='blue', size=1) +
labs(x="Year", y="Annual Precipitation (mm/yr)",
title="WARM Annual Precipitation Simulations\nRed Line=Mean of Simulations, Blue Line=Observd Regional Mean")
First, select a random location from the complete climate dataset.
clim.locs <- select(clim.yr, LAT, LON) %>%
unique()
loc <- clim.locs[sample(nrow(clim.locs), size=1),] %>% as.numeric
names(loc) <- c('LAT', 'LON')
loc
## LAT LON
## 42.19 -72.19
Now extract the monthly and annual climate datasets for this location.
clim.loc.yr <- filter(clim.yr, LAT==loc[['LAT']], LON==loc[['LON']]) %>%
spread(VAR, VALUE) %>%
arrange(YEAR)
clim.loc.mon <- filter(clim.mon, LAT==loc[['LAT']], LON==loc[['LON']]) %>%
spread(VAR, VALUE) %>%
arrange(YEAR, MONTH)
Compute the euclidian distance between a single simulated annual precipitation value and each observed year. The simulated annual precipitation is for the first year in the first simulation trial.
j <- 1 # year number
samp <- 1 # simulation trial number
# compute distances between simulated WARM timeseries and observed regional precip
distances <- cbind(clim.reg.yr, DISTANCE=sqrt((sim.prcp$x[j,samp] - clim.reg.yr$PRCP)^2))
ggplot(distances, aes(YEAR, PRCP)) +
geom_line() +
geom_point(aes(color=DISTANCE), size=3) +
geom_hline(yint=sim.prcp$x[j,samp], linetype=2, color='red') +
geom_text(x=max(distances$YEAR), y=sim.prcp$x[j,samp], label="Sim", hjust=1, vjust=-0.5) +
scale_color_gradient(low='green', high='red') +
labs(x="Year", y="Annual Precipitation (mm/yr)",
title="Regional and Current Simulation Annual Precipitation")
Select the eight years from the observed regional annual simulation that are the most similar to the first simulation trial and year.
distances <- mutate(distances,
IN_TOP_8=rank(DISTANCE) %in% seq(1,8))
distances %>%
ggplot(aes(YEAR, PRCP)) +
geom_line() +
geom_point(aes(color=DISTANCE), size=3) +
geom_hline(yint=sim.prcp$x[j,samp], linetype=2, color='red') +
geom_point(mapping=aes(alpha=IN_TOP_8), color='black', shape=1, size=4) +
geom_text(aes(x=YEAR, y=PRCP, label=YEAR), data=filter(distances, IN_TOP_8), vjust=-1) +
geom_text(x=max(distances$YEAR), y=sim.prcp$x[j,samp], label="Sim", hjust=1, vjust=-0.5) +
scale_alpha_manual(values=c("TRUE"=1, "FALSE"=0), guide=FALSE) +
scale_color_gradient(low='green', high='red') +
labs(x="Year", y="Annual Precipitation (mm)", title="Regional Annual Precipitation with 8 Nearest Neighbors Selected")
Extract the observed regional precipitation for the candidate years and compute the sampling weights as inverse distance-squared weighted.
distances.top <- filter(distances, IN_TOP_8) %>%
mutate(WEIGHT=(1/DISTANCE)^2,
WEIGHT=WEIGHT/sum(WEIGHT))
distances.top %>%
ggplot(aes(PRCP, WEIGHT)) +
geom_point() +
geom_vline(xint=sim.prcp$x[j,samp], linetype=2, color='red') +
geom_text(aes(label=YEAR), hjust=-0.1) +
geom_text(x=sim.prcp$x[j,samp], y=0.4, label="Sim Precip", hjust=-0.1) +
labs(x="Annual Precipitation (mm)", y="Sample Weight")
Now we can randomly sample from the 8 nearest neighbors using the sampling weights based on the inverse distance squared. We’ll do this 1000 times and compare the frequency of the samples to the sample weights.
sampled.years <- sample(distances.top$YEAR,
size=1000,
prob=distances.top$WEIGHT,
replace=TRUE)
sampled.years.tbl <- prop.table(table(sampled.years))
merge(distances.top,
data.frame(YEAR=names(sampled.years.tbl),
FREQ=as.vector(sampled.years.tbl)),
all.x=TRUE) %>%
select(YEAR, WEIGHT, FREQ) %>%
ggplot(aes(WEIGHT, FREQ)) +
geom_point(size=3) +
geom_abline(linetype=2) +
labs(x="Sample Weight", y="Sampled Frequency")
Now if we just sample for one year.
sampled.year <- sample(distances.top$YEAR, size=1, prob=distances.top$WEIGHT, replace=TRUE)
sampled.year
## [1] 1988
Then we can extract the observed monthly climate data for the sampled year from the observed dataset of the selected location.
filter(clim.loc.mon, YEAR==sampled.year)
## YEAR MONTH LAT LON PRCP TMAX TMIN TAVG
## 1 1988 1 42.19 -72.19 69.50 -0.79 -14.22 7.47
## 2 1988 2 42.19 -72.19 91.22 2.19 -10.25 7.72
## 3 1988 3 42.19 -72.19 67.57 7.65 -4.45 7.30
## 4 1988 4 42.19 -72.19 90.10 11.46 1.60 7.16
## 5 1988 5 42.19 -72.19 83.15 19.66 7.44 4.71
## 6 1988 6 42.19 -72.19 30.23 24.21 10.75 5.48
## 7 1988 7 42.19 -72.19 195.85 27.53 16.17 4.91
## 8 1988 8 42.19 -72.19 56.88 27.33 16.02 5.23
## 9 1988 9 42.19 -72.19 48.98 21.52 7.96 5.19
## 10 1988 10 42.19 -72.19 73.50 13.09 1.31 7.04
## 11 1988 11 42.19 -72.19 205.03 10.55 -1.17 8.80
## 12 1988 12 42.19 -72.19 30.42 2.41 -8.64 8.66
Given the selected location, repeat the nearest neighbor process for each of the num_sim simulations and num_year years, and store the result in a 3-dimensional array.
num_sim <- ncol(sim.prcp$x)
num_year <- nrow(sim.prcp$x) # number of composited years
num_months <- num_year*12
climate_variables <- select(clim.reg.yr, -YEAR) %>%
names()
sampled.year <- array(NA, c(num_year, num_sim))
gen.loc.mon.arr <- array(NA, c(num_months, length(climate_variables), num_sim))
# loop through simulations (trials)
for (samp in 1:num_sim) {
# loop through years
for (j in 1:num_year) {
# compute distances between simulated WARM timeseries and observed regional precip
distances <- mutate(clim.reg.yr,
DISTANCE=sqrt((sim.prcp$x[j, samp] - clim.reg.yr$PRCP)^2),
IN_TOP_8=rank(DISTANCE) %in% seq(1,8))
distances.top <- filter(distances, IN_TOP_8) %>%
mutate(WEIGHT=(1/DISTANCE)^2,
WEIGHT=WEIGHT/sum(WEIGHT))
sampled.year[j,samp] <- sample(distances.top$YEAR,
size=1,
prob=distances.top$WEIGHT,
replace=FALSE)
row.idx <- ((1+12*(j-1)):(12+12*(j-1)))
gen.loc.mon.arr[row.idx, , samp] <- data.matrix(filter(clim.loc.mon,
YEAR==sampled.year[j, samp]) %>%
select(PRCP, TMAX, TMIN, TAVG))
}
}
Now convert the 3-dim array to a list of dataframes with each element in the list containing the simulated data for a single variable.
gen.loc.mon <- sapply(climate_variables, function(v) {
i.var <- which(climate_variables==v)
df <- as.data.frame(gen.loc.mon.arr[,i.var,]) %>%
mutate(TIMESTEP=row_number(),
SIM_YEAR=rep(seq(1, num_year), each=12),
SIM_MONTH=rep(seq(1, 12), times=num_year),
VAR=v) %>%
gather(TRIAL, VALUE, -TIMESTEP, -SIM_YEAR, -SIM_MONTH, -VAR) %>%
mutate(TRIAL=as.numeric(TRIAL))
return(list(df))
})
str(gen.loc.mon)
## List of 4
## $ PRCP:'data.frame': 37200 obs. of 6 variables:
## ..$ TIMESTEP : int [1:37200] 1 2 3 4 5 6 7 8 9 10 ...
## ..$ SIM_YEAR : int [1:37200] 1 1 1 1 1 1 1 1 1 1 ...
## ..$ SIM_MONTH: int [1:37200] 1 2 3 4 5 6 7 8 9 10 ...
## ..$ VAR : chr [1:37200] "PRCP" "PRCP" "PRCP" "PRCP" ...
## ..$ TRIAL : num [1:37200] 1 1 1 1 1 1 1 1 1 1 ...
## ..$ VALUE : num [1:37200] 69.5 91.2 67.6 90.1 83.2 ...
## $ TMAX:'data.frame': 37200 obs. of 6 variables:
## ..$ TIMESTEP : int [1:37200] 1 2 3 4 5 6 7 8 9 10 ...
## ..$ SIM_YEAR : int [1:37200] 1 1 1 1 1 1 1 1 1 1 ...
## ..$ SIM_MONTH: int [1:37200] 1 2 3 4 5 6 7 8 9 10 ...
## ..$ VAR : chr [1:37200] "TMAX" "TMAX" "TMAX" "TMAX" ...
## ..$ TRIAL : num [1:37200] 1 1 1 1 1 1 1 1 1 1 ...
## ..$ VALUE : num [1:37200] -0.79 2.19 7.65 11.46 19.66 ...
## $ TMIN:'data.frame': 37200 obs. of 6 variables:
## ..$ TIMESTEP : int [1:37200] 1 2 3 4 5 6 7 8 9 10 ...
## ..$ SIM_YEAR : int [1:37200] 1 1 1 1 1 1 1 1 1 1 ...
## ..$ SIM_MONTH: int [1:37200] 1 2 3 4 5 6 7 8 9 10 ...
## ..$ VAR : chr [1:37200] "TMIN" "TMIN" "TMIN" "TMIN" ...
## ..$ TRIAL : num [1:37200] 1 1 1 1 1 1 1 1 1 1 ...
## ..$ VALUE : num [1:37200] -14.22 -10.25 -4.45 1.6 7.44 ...
## $ TAVG:'data.frame': 37200 obs. of 6 variables:
## ..$ TIMESTEP : int [1:37200] 1 2 3 4 5 6 7 8 9 10 ...
## ..$ SIM_YEAR : int [1:37200] 1 1 1 1 1 1 1 1 1 1 ...
## ..$ SIM_MONTH: int [1:37200] 1 2 3 4 5 6 7 8 9 10 ...
## ..$ VAR : chr [1:37200] "TAVG" "TAVG" "TAVG" "TAVG" ...
## ..$ TRIAL : num [1:37200] 1 1 1 1 1 1 1 1 1 1 ...
## ..$ VALUE : num [1:37200] 7.47 7.72 7.3 7.16 4.71 5.48 4.91 5.23 5.19 7.04 ...
Aggregate the generated timeseries to compute annual values or each variable.
gen.loc.yr <- lapply(gen.loc.mon, function(df) {
if ('PRCP' %in% unique(df$VAR)) {
df.yr <- group_by(df, VAR, TRIAL, SIM_YEAR) %>%
summarise(VALUE=sum(VALUE))
} else {
df.yr <- group_by(df, VAR, TRIAL, SIM_YEAR) %>%
summarise(VALUE=mean(VALUE))
}
df.yr <- df.yr %>%
ungroup() %>%
as.data.frame()
return(df.yr)
})
str(gen.loc.yr)
## List of 4
## $ PRCP:'data.frame': 3100 obs. of 4 variables:
## ..$ VAR : chr [1:3100] "PRCP" "PRCP" "PRCP" "PRCP" ...
## ..$ TRIAL : num [1:3100] 1 1 1 1 1 1 1 1 1 1 ...
## ..$ SIM_YEAR: int [1:3100] 1 2 3 4 5 6 7 8 9 10 ...
## ..$ VALUE : num [1:3100] 1042 1300 1408 1065 1451 ...
## $ TMAX:'data.frame': 3100 obs. of 4 variables:
## ..$ VAR : chr [1:3100] "TMAX" "TMAX" "TMAX" "TMAX" ...
## ..$ TRIAL : num [1:3100] 1 1 1 1 1 1 1 1 1 1 ...
## ..$ SIM_YEAR: int [1:3100] 1 2 3 4 5 6 7 8 9 10 ...
## ..$ VALUE : num [1:3100] 13.9 15.1 14.4 14.1 14.5 ...
## $ TMIN:'data.frame': 3100 obs. of 4 variables:
## ..$ VAR : chr [1:3100] "TMIN" "TMIN" "TMIN" "TMIN" ...
## ..$ TRIAL : num [1:3100] 1 1 1 1 1 1 1 1 1 1 ...
## ..$ SIM_YEAR: int [1:3100] 1 2 3 4 5 6 7 8 9 10 ...
## ..$ VALUE : num [1:3100] 1.88 3.48 2.83 1.66 2.35 ...
## $ TAVG:'data.frame': 3100 obs. of 4 variables:
## ..$ VAR : chr [1:3100] "TAVG" "TAVG" "TAVG" "TAVG" ...
## ..$ TRIAL : num [1:3100] 1 1 1 1 1 1 1 1 1 1 ...
## ..$ SIM_YEAR: int [1:3100] 1 2 3 4 5 6 7 8 9 10 ...
## ..$ VALUE : num [1:3100] 6.64 6.44 6.62 6.56 6.39 ...
This figure shows all the weather generator simulations compared to the observed climate dataset at the selected location.
do.call(rbind, gen.loc.yr) %>%
mutate(VAR=factor(VAR)) %>%
ggplot() +
geom_line(aes(SIM_YEAR, VALUE, group=TRIAL), alpha=0.5) +
geom_line(aes(SIM_YEAR, VALUE),
data=mutate(clim.reg.yr, SIM_YEAR=row_number()) %>%
gather(VAR, VALUE, PRCP:TAVG),
color='red') +
facet_wrap(~VAR, scales='free_y') +
labs(x="Simulation Year", y="Value", title="All Generated Datasets with Observed Location Dataset ")
This figure shows only the first weather generator simulation compared to the observed climate dataset at the selected location.
do.call(rbind, gen.loc.yr) %>%
mutate(VAR=factor(VAR)) %>%
filter(TRIAL==1) %>%
ggplot() +
geom_line(aes(SIM_YEAR, VALUE, group=TRIAL), alpha=0.5) +
geom_line(aes(SIM_YEAR, VALUE),
data=mutate(clim.reg.yr, SIM_YEAR=row_number()) %>%
gather(VAR, VALUE, PRCP:TAVG),
color='red') +
facet_wrap(~VAR, scales='free_y') +
labs(x="Simulation Year", y="Value", title="Single Generated Dataset with Observed Location Dataset ")
The following three figures compare the mean, standard deviation, and skewness of the annual precipitation timeseries between the weather generator simulations and the observed climate dataset at the selected location. The boxplots show the distribution of each statistic across the simulations, the red point shows the mean of the statistic for the simulations, and the blue point shows the statistic value for the observed annual timeseries.
do.call(rbind, gen.loc.yr) %>%
mutate(VAR=factor(VAR)) %>%
group_by(VAR, TRIAL) %>%
summarise(MEAN=mean(VALUE)) %>%
ggplot(aes(x=1, y=MEAN)) +
geom_boxplot() +
stat_summary(fun.y=mean, geom="point", color="red", size=3) +
geom_point(aes(x=1, y=MEAN),
data=gather(clim.loc.yr, VAR, VALUE, PRCP:TAVG) %>%
group_by(VAR) %>%
summarise(MEAN=mean(VALUE)),
color='blue', size=3) +
facet_wrap(~VAR, scales='free_y') +
labs(x="", y="Mean", title="Mean of Annual Precipitation for Generated (Red) and Observed (Blue)") +
theme(axis.text.x=element_blank(), axis.ticks.x=element_blank())
do.call(rbind, gen.loc.yr) %>%
mutate(VAR=factor(VAR)) %>%
group_by(VAR, TRIAL) %>%
summarise(SD=sd(VALUE)) %>%
ggplot(aes(x=1, y=SD)) +
geom_boxplot() +
stat_summary(fun.y=mean, geom="point", color="red", size=3) +
geom_point(aes(x=1, y=SD),
data=gather(clim.loc.yr, VAR, VALUE, PRCP:TAVG) %>%
group_by(VAR) %>%
summarise(SD=sd(VALUE)),
color='blue', size=3) +
facet_wrap(~VAR, scales='free_y') +
labs(x="", y="Standard Deviation", title="StDev of Annual Precipitation for Generated (Red) and Observed (Blue)") +
theme(axis.text.x=element_blank(), axis.ticks.x=element_blank())
do.call(rbind, gen.loc.yr) %>%
mutate(VAR=factor(VAR)) %>%
group_by(VAR, TRIAL) %>%
summarise(SKEW=skewness(VALUE)) %>%
ggplot(aes(x=1, y=SKEW)) +
geom_boxplot() +
stat_summary(fun.y=mean, geom="point", color="red", size=3) +
geom_point(aes(x=1, y=SKEW),
data=gather(clim.loc.yr, VAR, VALUE, PRCP:TAVG) %>%
group_by(VAR) %>%
summarise(SKEW=skewness(VALUE)),
color='blue', size=3) +
facet_wrap(~VAR, scales='free_y') +
labs(x="", y="Skewness", title="Skewness of Annual Precipitation for Generated (Red) and Observed (Blue)") +
theme(axis.text.x=element_blank(), axis.ticks.x=element_blank())
sessionInfo()
## R version 3.1.1 (2014-07-10)
## Platform: x86_64-apple-darwin13.1.0 (64-bit)
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] splines grid stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] weatherGen_0.1 devtools_1.5 Hmisc_3.14-4 Formula_1.1-1
## [5] survival_2.37-7 lattice_0.20-29 mapproj_1.2-2 maps_2.3-7
## [9] ggmap_2.3 ggplot2_1.0.0 lubridate_1.3.3 stringr_0.6.2
## [13] tidyr_0.1 dplyr_0.2 forecast_5.4 timeDate_3010.98
## [17] zoo_1.7-11
##
## loaded via a namespace (and not attached):
## [1] assertthat_0.1 biwavelet_0.17.3 car_2.0-20
## [4] cluster_1.15.2 colorspace_1.2-4 digest_0.6.4
## [7] evaluate_0.5.5 fields_7.1 formatR_0.10
## [10] fracdiff_1.4-2 gtable_0.1.2 htmltools_0.2.4
## [13] httr_0.3 hydromad_0.9-20 knitr_1.6
## [16] labeling_0.2 latticeExtra_0.6-26 magrittr_1.0.1
## [19] MASS_7.3-33 memoise_0.2.1 munsell_0.4.2
## [22] nnet_7.3-8 parallel_3.1.1 plyr_1.8.1
## [25] png_0.1-7 proto_0.3-10 quadprog_1.5-5
## [28] RColorBrewer_1.0-5 Rcpp_0.11.2 RCurl_1.95-4.1
## [31] reshape2_1.4 RgoogleMaps_1.2.0.6 rjson_0.2.14
## [34] RJSONIO_1.2-0.2 rmarkdown_0.2.50 scales_0.2.4
## [37] spam_0.41-0 tools_3.1.1 tseries_0.10-32
## [40] whisker_0.3-2 yaml_2.1.13